Preparations

Load the necessary libraries

library(rstanarm)   #for fitting models in STAN
library(brms)       #for fitting models in STAN
library(coda)       #for diagnostics
library(bayesplot)  #for diagnostics
library(ggmcmc)     #for MCMC diagnostics
library(DHARMa)     #for residual diagnostics
library(rstan)      #for interfacing with STAN
library(emmeans)    #for marginal means etc
library(broom)      #for tidying outputs
library(tidybayes)  #for more tidying outputs
library(ggeffects)  #for partial plots
library(tidyverse)  #for data wrangling etc
library(broom.mixed)#for summarising models
library(ggeffects)  #for partial effects plots
theme_set(theme_classic()) #put the default ggplot theme back

Scenario

Polis et al. (1998) were intested in modelling the presence/absence of lizards (Uta sp.) against the perimeter to area ratio of 19 islands in the Gulf of California.

Uta lizard

Format of polis.csv data file

island ratio pa
Bota 15.41 1
Cabeza 5.63 1
Cerraja 25.92 1
Coronadito 15.17 0
.. .. ..
island Categorical listing of the name of the 19 islands used - variable not used in analysis.
ratio Ratio of perimeter to area of the island.
pa Presence (1) or absence (0) of Uta lizards on island.

The aim of the analysis is to investigate the relationship between island parimeter to area ratio and the presence/absence of Uta lizards.

Read in the data

polis <- read_csv('../data/polis.csv', trim_ws=TRUE) %>%
  janitor::clean_names()
glimpse(polis)
## Rows: 19
## Columns: 3
## $ island <chr> "Bota", "Cabeza", "Cerraja", "Coronadito", "Flecha", "Gemelose…
## $ ratio  <dbl> 15.41, 5.63, 25.92, 15.17, 13.04, 18.85, 30.95, 22.87, 12.01, …
## $ pa     <dbl> 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1

Exploratory data analysis

Model formula: \[ \begin{align} y_i &\sim{} \mathcal{Bin}(n, p_i)\\ ln\left(\frac{p_i}{1-p_i}\right) &= \beta_0 + \beta_1 x_i\\ \beta_0 &\sim{} \mathcal{N}(0,10)\\ \beta_1 &\sim{} \mathcal{N}(0,1)\\ \end{align} \]

Fit the model

priors <- 
  prior(normal(0,10), class = "Intercept") +
  prior(normal(0,1), class = "b")
polis_brm1 <- brm(bf(pa|trials(1) ~ ratio, family = binomial()),
                  data = polis, 
                  prior = priors,
                  sample_prior = "only", # predictive prior distribution
                  iter = 5000, warmup = 1000, chains = 3, thin = 5, refresh = 0)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk   -fPIC  -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1

Note that for a binomial, you have to indicate the total number of trials (trials is a specific reserved word in this function!). Here, we just write trials(1) for the Bernoulli trials seen here (either presence or absence, rather than counts)

conditional_effects(polis_brm1) %>% plot(points = TRUE)

This shows us that our priors are not strong at all, in fact, they may actually be too weak!

We will skip a few models ahead of last time to fit the final model we saw last time…

polis_brm3 <- brm(bf(pa|trials(1) ~ ratio, family = binomial()),
                  data = polis, 
                  prior = priors,
                  sample_prior = "yes", # predictive prior distribution
                  iter = 5000, warmup = 1000, chains = 3, thin = 5, refresh = 0)
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.0/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk   -fPIC  -isysroot /Library/Developer/CommandLineTools/SDKs/MacOSX.sdk -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:88:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:1: error: unknown type name 'namespace'
## namespace Eigen {
## ^
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:613:16: error: expected ';' after top level declarator
## namespace Eigen {
##                ^
##                ;
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
## In file included from /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Dense:1:
## /Library/Frameworks/R.framework/Versions/4.0/Resources/library/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
## #include <complex>
##          ^~~~~~~~~
## 3 errors generated.
## make: *** [foo.o] Error 1
polis_brm3$fit %>% get_variables()
##  [1] "b_Intercept"     "b_ratio"         "prior_Intercept" "prior_b"        
##  [5] "lp__"            "accept_stat__"   "stepsize__"      "treedepth__"    
##  [9] "n_leapfrog__"    "divergent__"     "energy__"
polis_brm3 %>% hypothesis("ratio = 0") %>% plot()

Can see that the prior has not affected the posterior distribution, and thus were uninformative.

MCMC sampling diagnostics

mcmc_plot(polis_brm3, type='combo') # good mixing of chains

mcmc_plot(polis_brm3, type='acf_bar') # No autocorrelation

mcmc_plot(polis_brm3, type='rhat_hist') # Rhat less than 1.05

mcmc_plot(polis_brm3, type='neff_hist') # Neff greater than 0.5 or 50%

ggs_crosscorrelation(ggs(polis_brm3$fit)) # no unexpected cross-correlation

ggs_grb(ggs(polis_brm3$fit)) # scale reduction

Model validation

pp_check(polis_brm3, type = "dens_overlay", nsamples = 100)

pp_check(polis_brm3, x = "ratio", type = "intervals")

DHARMa residuals:

preds <- posterior_predict(polis_brm3, nsamples = 250, 
                           summary = FALSE)
polis_resids <- createDHARMa(
  simulatedResponse = t(preds),  # simulated predictions/expected values for each observation
  observedResponse = polis$pa, # true values
  fittedPredictedResponse = apply(preds, 2, median), # get median expected value for all data points
  integerResponse = TRUE) # type of distribution

plot(polis_resids)

Partial effects plots

polis_brm3 %>%
  conditional_effects() %>%
  plot(points = TRUE)

Model investigation

summary(polis_brm3)
##  Family: binomial 
##   Links: mu = logit 
## Formula: pa | trials(1) ~ ratio 
##    Data: polis (Number of observations: 19) 
## Samples: 3 chains, each with iter = 5000; warmup = 1000; thin = 5;
##          total post-warmup samples = 2400
## 
## Population-Level Effects: 
##           Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS Tail_ESS
## Intercept     4.55      2.00     1.31     9.18 1.00     1986     1935
## ratio        -0.28      0.12    -0.56    -0.09 1.00     1989     2253
## 
## Samples were drawn using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
(x <-tidyMCMC(polis_brm3$fit, estimate.method = "median",
         conf.int = T, conf.method = "HPDinterval",
         rhat = F, ess = F) %>% as.data.frame)

75.8094817 times as likely to be present rather than absent 98.6980774 % present vs. absent 98.3086951 % present vs. absent when ratio increases by 1 unit

Bayes R^2:

polis_brm3 %>%
  bayes_R2(summary = FALSE) %>%
  median_hdci()

Further analyses

What is the LD50?

LD50 = -Intercept/slope

polis_brm3$fit %>% get_variables
##  [1] "b_Intercept"     "b_ratio"         "prior_Intercept" "prior_b"        
##  [5] "lp__"            "accept_stat__"   "stepsize__"      "treedepth__"    
##  [9] "n_leapfrog__"    "divergent__"     "energy__"
LD50s <- polis_brm3$fit %>% 
  tidy_draws() %>%
  mutate(LD50 = -b_Intercept/b_ratio)
LD50_hpd <- LD50s %>% median_hdci(LD50) %>% janitor::clean_names()
  
LD50s %>%
  ggplot() +
  geom_histogram(aes(x = LD50)) +
  geom_vline(xintercept = c(LD50_hpd$lower,LD50_hpd$upper), 
             color = "red", linetype = "dashed") +
  geom_vline(xintercept = c(LD50_hpd$ld50), 
             color = "red") +
  scale_y_continuous(expand = expansion())

Summary figure

polis_grid <- with(polis, list(ratio = modelr::seq_range(ratio, n=100)))

newdata <- polis_brm3 %>% 
  emmeans(~ratio, at = polis_grid, type = "response") %>%
  as.data.frame() %>%
  rename(pa = prob, lwr = lower.HPD, upr = upper.HPD)

newdata %>% 
  ggplot(aes(x = ratio, y = pa)) +
  geom_ribbon(aes(ymin = lwr, ymax = upr), fill = "blue", alpha = 0.3) +
  geom_line() + 
  geom_point(data = polis) +
  labs(x = "Island ratio", y = "Presence vs. absence") +
  geom_vline(xintercept = c(LD50_hpd$lower,LD50_hpd$upper), 
             color = "red", linetype = "dashed") +
  geom_vline(xintercept = c(LD50_hpd$ld50), 
             color = "red") +
  scale_x_continuous(expand = expansion())

References

Polis, G. A., S. D. Hurd, C. D. Jackson, and F. Sanchez-Piñero. 1998. “Multifactor Population Limitation: Variable Spatial and Temporal Control of Spiders on Gulf of California Islands.” Ecology 79: 490–502.